#!/usr/bin/env python3

"""
This script is used to convert the output of dwarfdump into a file that is easy
to load into lfortran to lookup filenames and line numbers for a given address.
Here is how to use it:

        cd src/bin
        llvm-dwarfdump --debug-line lfortran.dSYM > lfortran.dSYM/symbols.txt
        ./dwarf_convert.py lfortran.dSYM/symbols.txt lfortran.dSYM/lines.txt lfortran.dSYM/lines.dat

This is meant to be executed at build time.

A better solution would be to use the `dwarf` library directly from C++ and
generate the same output directly. Here is the source code of llvm-dwarfdump:

https://github.com/llvm/llvm-project/blob/91a6ad5ad887a16e361338303d4ff3d29dba5e10/llvm/tools/llvm-dwarfdump/llvm-dwarfdump.cpp

We just have to do exactly what it does, but generate the output in the format
of lines.txt and lines.dat

"""

from collections import namedtuple
from glob import glob
import os
import re
from struct import pack
import sys

DebugLines = namedtuple("DebugLines", ["lines"])
DebugLine = namedtuple("DebugLine", ["include_directories", "file_names", "addresses"])
IncludeDirectory = namedtuple("IncludeDirectory", ["id", "path"])
FileName = namedtuple("FileName", ["id", "filename", "dir_idx"])

ASRDebugLines = namedtuple("ASRDebugLines", ["filenames", "addresses"])

class Parser:
    """
    Parser for the output generated by dwarfdump.

    On macOS (both Intel and ARM based):

        dwarfdump --debug-line src/bin/lfortran.dSYM > symbols.txt

    Then parse it using:

        p = Parser()
        ast = p.parse_file("symbols.txt")
    """

    def parse_file(self, filename):
        self.file = open(filename)
        self.line = self.file.readline()
        while not self.line.startswith(".debug_line contents:"):
            self.line = self.file.readline()

        self.line = self.file.readline()
        lines = []
        while self.line.startswith("debug_line"):
            d = self.parse_debug_line()
            lines.append(d)
        return DebugLines(lines)

    def parse_debug_line(self):
        self.line = self.file.readline()
        include_dirs_found = True
        while not self.line.startswith("include_directories"):
            if self.line.startswith("file_names"):
                include_dirs_found = False
                break
            else:
                self.line = self.file.readline()

        include_directories = []
        if include_dirs_found:
            while self.line.startswith("include_directories"):
                n, path = re.compile(r"include_directories\[[ ]*(\d+)\] = \"([^\"]+)\"").findall(self.line)[0]
                n = int(n)
                include_directories.append(IncludeDirectory(n, path))
                self.line = self.file.readline()

        file_names = []
        while self.line.startswith("file_names"):
            n = re.compile(r"file_names\[[ ]*(\d+)\]:").findall(self.line)[0]
            n = int(n)

            self.line = self.file.readline()
            filename = re.compile(r"name: \"([^\"]+)\"").findall(self.line)[0]

            self.line = self.file.readline()
            dir_idx = re.compile(r"dir_index: (\d+)").findall(self.line)[0]
            dir_idx = int(dir_idx)

            self.line = self.file.readline()
            self.line = self.file.readline()

            file_names.append(FileName(n, filename, dir_idx))

            self.line = self.file.readline()

        self.line = self.file.readline()
        self.line = self.file.readline()
        self.line = self.file.readline()

        addresses = []
        while self.line.startswith("0x"):
            address, line, column, file_id = self.line.split()[:4]
            address = int(address, base=16)
            line = int(line)
            column = int(column)
            file_id = int(file_id)
            addresses.append([address, line, column, file_id])
            self.line = self.file.readline()

        self.line = self.file.readline()

        d = DebugLine(include_directories, file_names, addresses)
        return d

def ast_to_asr(ast):
    local_files = glob("../**/*.cpp", recursive=True) + \
                  glob("../**/*.h", recursive=True)
    for i in range(len(local_files)):
        local_files[i] = os.path.abspath(local_files[i])
    def make_abs(end_path):
        if end_path[0] != "/":
            for f in local_files:
                if f.endswith(end_path):
                    return f
        return end_path
    lines = []
    last_address = -1
    global_filename_id = 0
    global_filenames = []
    global_addresses = []
    for line in ast.lines:
        include_dirs = {}
        for inc in line.include_directories:
            include_dirs[inc.id] = inc.path
        filenames = {}
        for filename in line.file_names:
            prefix = ""
            if filename.dir_idx != 0:
                prefix = include_dirs[filename.dir_idx] + "/"
            filenames[filename.id] = global_filename_id
            global_filenames.append(make_abs(prefix+filename.filename))
            global_filename_id += 1
        for address, line_num, column, file_id in line.addresses:
            filename = global_filenames[filenames[file_id]]
            assert last_address <= address
            last_address = address
            if line_num != 0:
                global_addresses.append([address, line_num, filenames[file_id]])
    return ASRDebugLines(global_filenames, global_addresses)


p = Parser()
ast = p.parse_file(sys.argv[1])
asr = ast_to_asr(ast)
with open(sys.argv[2], "w") as f:
    f.write(str(len(asr.filenames)) + "\n")
    for filename in asr.filenames:
        f.write(filename + "\n")
    f.write(str(len(asr.addresses)) + "\n")
with open(sys.argv[3], "wb") as f:
    for addr, line, fileid in asr.addresses:
        f.write(pack("3Q", addr, line, fileid))
